home *** CD-ROM | disk | FTP | other *** search
- '''
- Parent class of all parser
- '''
-
- import hachoir_core
- import hachoir_editor
-
- import os
-
- import mat
-
- NOMETA = ('.bmp', '.rdf', '.txt', '.xml', '.rels')
- #bmp : image
- #rdf : text
- #txt : plain text
- #xml : formated text
- #rels : openxml foramted text
-
-
- FIELD = object()
-
- class GenericParser(object):
- '''
- Parent class of all parsers
- '''
- def __init__(self, filename, parser, mime, backup, add2archive):
- self.filename = ''
- self.parser = parser
- self.mime = mime
- self.backup = backup
- self.editor = hachoir_editor.createEditor(parser)
- self.realname = filename
- try:
- self.filename = hachoir_core.cmd_line.unicodeFilename(filename)
- except TypeError: # get rid of "decoding Unicode is not supported"
- self.filename = filename
- basename, ext = os.path.splitext(filename)
- self.output = basename + '.cleaned' + ext
- self.basename = os.path.basename(filename) # only filename
-
- def is_clean(self):
- '''
- Check if the file is clean from harmful metadatas
- '''
- for field in self.editor:
- if self._should_remove(field):
- return self._is_clean(self.editor)
- return True
-
- def _is_clean(self, fieldset):
- for field in fieldset:
- remove = self._should_remove(field)
- if remove is True:
- return False
- if remove is FIELD:
- if not self._is_clean(field):
- return False
- return True
-
- def remove_all(self):
- '''
- Remove all the files that are compromizing
- '''
- state = self._remove_all(self.editor)
- hachoir_core.field.writeIntoFile(self.editor, self.output)
- self.do_backup()
- return state
-
- def _remove_all(self, fieldset):
- try:
- for field in fieldset:
- remove = self._should_remove(field)
- if remove is True:
- self._remove(fieldset, field.name)
- if remove is FIELD:
- self._remove_all(field)
- return True
- except:
- return False
-
- def _remove(self, fieldset, field):
- '''
- Delete the given field
- '''
- del fieldset[field]
-
- def get_meta(self):
- '''
- Return a dict with all the meta of the file
- '''
- metadata = {}
- self._get_meta(self.editor, metadata)
- return metadata
-
- def _get_meta(self, fieldset, metadata):
- for field in fieldset:
- remove = self._should_remove(field)
- if remove is True:
- try:
- metadata[field.name] = field.value
- except:
- metadata[field.name] = 'harmful content'
- if remove is FIELD:
- self._get_meta(field)
-
- def _should_remove(self, key):
- '''
- return True if the field is compromizing
- abstract method
- '''
- raise NotImplementedError
-
- def do_backup(self):
- '''
- Do a backup of the file if asked,
- and change his creation/access date
- '''
- if self.backup is False:
- mat.secure_remove(self.filename)
- os.rename(self.output, self.filename)
-